import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.offline as offline
import itertools
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import statsmodels.api as sm
import matplotlib
import warnings
warnings.filterwarnings("ignore")
from statsmodels.tsa.stattools import adfuller
offline.init_notebook_mode()
xls=pd.ExcelFile("PET_PRI_SPT_S1_M.xls")
df_crudeOil=pd.read_excel(xls,"Data 1")
df_cGasoline=pd.read_excel(xls,"Data 2")
df_rGasoline=pd.read_excel(xls,"Data 3")
df_heatOil=pd.read_excel(xls,"Data 4")
df_diesel=pd.read_excel(xls,"Data 5")
df_jet=pd.read_excel(xls,"Data 6")
df_prop=pd.read_excel(xls,"Data 7")
def cleanData(data):
data.columns=data.iloc[1,:]
data=data.iloc[2:,:]
return data
#Crude Oil Dataset
df_crudeOil=cleanData(df_crudeOil)
df_crudeOil.head()
| 1 | Date | Cushing, OK WTI Spot Price FOB (Dollars per Barrel) | Europe Brent Spot Price FOB (Dollars per Barrel) |
|---|---|---|---|
| 2 | 1986-01-15 00:00:00 | 22.93 | NaN |
| 3 | 1986-02-15 00:00:00 | 15.46 | NaN |
| 4 | 1986-03-15 00:00:00 | 12.61 | NaN |
| 5 | 1986-04-15 00:00:00 | 12.84 | NaN |
| 6 | 1986-05-15 00:00:00 | 15.38 | NaN |
#C Gasoline Dataset
df_cGasoline=cleanData(df_cGasoline)
df_cGasoline.head()
| 1 | Date | New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) | U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) |
|---|---|---|---|
| 2 | 1986-06-15 00:00:00 | 0.42 | 0.409 |
| 3 | 1986-07-15 00:00:00 | 0.34 | 0.334 |
| 4 | 1986-08-15 00:00:00 | 0.426 | 0.416 |
| 5 | 1986-09-15 00:00:00 | 0.42 | 0.398 |
| 6 | 1986-10-15 00:00:00 | 0.41 | 0.4 |
#R Gasoline Dataset
df_rGasoline=cleanData(df_rGasoline)
df_rGasoline.head()
| 1 | Date | Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon) |
|---|---|---|
| 2 | 2003-06-15 00:00:00 | 1.072 |
| 3 | 2003-07-15 00:00:00 | 0.965 |
| 4 | 2003-08-15 00:00:00 | 1.315 |
| 5 | 2003-09-15 00:00:00 | 0.949 |
| 6 | 2003-10-15 00:00:00 | 0.996 |
#Heat Oil Dataset
df_heatOil=cleanData(df_heatOil)
df_heatOil.head()
| 1 | Date | New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon) |
|---|---|---|
| 2 | 1986-06-15 00:00:00 | 0.38 |
| 3 | 1986-07-15 00:00:00 | 0.334 |
| 4 | 1986-08-15 00:00:00 | 0.408 |
| 5 | 1986-09-15 00:00:00 | 0.402 |
| 6 | 1986-10-15 00:00:00 | 0.394 |
#Diesel Dataset
df_diesel=cleanData(df_diesel)
df_diesel.head()
| 1 | Date | New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) | U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) | Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon) |
|---|---|---|---|---|
| 2 | 1996-04-15 00:00:00 | NaN | NaN | 0.95 |
| 3 | 1996-05-15 00:00:00 | NaN | NaN | 0.87 |
| 4 | 1996-06-15 00:00:00 | NaN | NaN | 0.745 |
| 5 | 1996-07-15 00:00:00 | NaN | NaN | 0.692 |
| 6 | 1996-08-15 00:00:00 | NaN | NaN | 0.705 |
#Jet Fuel Dataset
df_jet=cleanData(df_jet)
df_jet.head()
| 1 | Date | U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon) |
|---|---|---|
| 2 | 1990-04-15 00:00:00 | 0.54 |
| 3 | 1990-05-15 00:00:00 | 0.515 |
| 4 | 1990-06-15 00:00:00 | 0.494 |
| 5 | 1990-07-15 00:00:00 | 0.535 |
| 6 | 1990-08-15 00:00:00 | 0.791 |
#Propane Dataset
df_prop=cleanData(df_prop)
df_prop.head()
| 1 | Date | Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon) |
|---|---|---|
| 2 | 1992-06-15 00:00:00 | 0.344 |
| 3 | 1992-07-15 00:00:00 | 0.342 |
| 4 | 1992-08-15 00:00:00 | 0.354 |
| 5 | 1992-09-15 00:00:00 | 0.375 |
| 6 | 1992-10-15 00:00:00 | 0.354 |
#Crude Oil
df_crudeOil['Cushing, OK WTI Monthly Change']=df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)'].pct_change()*100
df_crudeOil['Europe Brent Monthly Change']=df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'].pct_change()*100
df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']=df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)'].astype('float')
df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)']=df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'].astype('float')
df_crudeOil.Date = pd.to_datetime(df_crudeOil.Date)
df_crudeOil.set_index('Date', inplace=True)
df_crudeOil
| 1 | Cushing, OK WTI Spot Price FOB (Dollars per Barrel) | Europe Brent Spot Price FOB (Dollars per Barrel) | Cushing, OK WTI Monthly Change | Europe Brent Monthly Change |
|---|---|---|---|---|
| Date | ||||
| 1986-01-15 | 22.93 | NaN | NaN | NaN |
| 1986-02-15 | 15.46 | NaN | -32.577410 | NaN |
| 1986-03-15 | 12.61 | NaN | -18.434670 | NaN |
| 1986-04-15 | 12.84 | NaN | 1.823949 | NaN |
| 1986-05-15 | 15.38 | NaN | 19.781931 | NaN |
| 1986-06-15 | 13.43 | NaN | -12.678804 | NaN |
| 1986-07-15 | 11.59 | NaN | -13.700670 | NaN |
| 1986-08-15 | 15.10 | NaN | 30.284728 | NaN |
| 1986-09-15 | 14.87 | NaN | -1.523179 | NaN |
| 1986-10-15 | 14.90 | NaN | 0.201748 | NaN |
| 1986-11-15 | 15.22 | NaN | 2.147651 | NaN |
| 1986-12-15 | 16.11 | NaN | 5.847569 | NaN |
| 1987-01-15 | 18.65 | NaN | 15.766605 | NaN |
| 1987-02-15 | 17.75 | NaN | -4.825737 | NaN |
| 1987-03-15 | 18.30 | NaN | 3.098592 | NaN |
| 1987-04-15 | 18.68 | NaN | 2.076503 | NaN |
| 1987-05-15 | 19.44 | 18.58 | 4.068522 | NaN |
| 1987-06-15 | 20.07 | 18.86 | 3.240741 | 1.506997 |
| 1987-07-15 | 21.34 | 19.86 | 6.327853 | 5.302227 |
| 1987-08-15 | 20.31 | 18.98 | -4.826617 | -4.431017 |
| 1987-09-15 | 19.53 | 18.31 | -3.840473 | -3.530032 |
| 1987-10-15 | 19.86 | 18.76 | 1.689708 | 2.457673 |
| 1987-11-15 | 18.85 | 17.78 | -5.085599 | -5.223881 |
| 1987-12-15 | 17.28 | 17.05 | -8.328912 | -4.105737 |
| 1988-01-15 | 17.13 | 16.75 | -0.868056 | -1.759531 |
| 1988-02-15 | 16.80 | 15.73 | -1.926445 | -6.089552 |
| 1988-03-15 | 16.20 | 14.73 | -3.571429 | -6.357279 |
| 1988-04-15 | 17.86 | 16.60 | 10.246914 | 12.695180 |
| 1988-05-15 | 17.42 | 16.31 | -2.463606 | -1.746988 |
| 1988-06-15 | 16.53 | 15.54 | -5.109070 | -4.721030 |
| ... | ... | ... | ... | ... |
| 2016-08-15 | 44.72 | 45.84 | 0.156775 | 1.979978 |
| 2016-09-15 | 45.18 | 46.57 | 1.028623 | 1.592496 |
| 2016-10-15 | 49.78 | 49.52 | 10.181496 | 6.334550 |
| 2016-11-15 | 45.66 | 44.73 | -8.276416 | -9.672859 |
| 2016-12-15 | 51.97 | 53.31 | 13.819536 | 19.181757 |
| 2017-01-15 | 52.50 | 54.58 | 1.019819 | 2.382292 |
| 2017-02-15 | 53.47 | 54.87 | 1.847619 | 0.531330 |
| 2017-03-15 | 49.33 | 51.59 | -7.742659 | -5.977766 |
| 2017-04-15 | 51.06 | 52.31 | 3.506994 | 1.395619 |
| 2017-05-15 | 48.48 | 50.33 | -5.052879 | -3.785127 |
| 2017-06-15 | 45.18 | 46.37 | -6.806931 | -7.868071 |
| 2017-07-15 | 46.63 | 48.48 | 3.209385 | 4.550356 |
| 2017-08-15 | 48.04 | 51.70 | 3.023804 | 6.641914 |
| 2017-09-15 | 49.82 | 56.15 | 3.705246 | 8.607350 |
| 2017-10-15 | 51.58 | 57.51 | 3.532718 | 2.422084 |
| 2017-11-15 | 56.64 | 62.71 | 9.810004 | 9.041906 |
| 2017-12-15 | 57.88 | 64.37 | 2.189266 | 2.647106 |
| 2018-01-15 | 63.70 | 69.08 | 10.055287 | 7.317073 |
| 2018-02-15 | 62.23 | 65.32 | -2.307692 | -5.442965 |
| 2018-03-15 | 62.73 | 66.02 | 0.803471 | 1.071647 |
| 2018-04-15 | 66.25 | 72.11 | 5.611350 | 9.224477 |
| 2018-05-15 | 69.98 | 76.98 | 5.630189 | 6.753571 |
| 2018-06-15 | 67.87 | 74.41 | -3.015147 | -3.338529 |
| 2018-07-15 | 70.98 | 74.25 | 4.582290 | -0.215025 |
| 2018-08-15 | 68.06 | 72.53 | -4.113835 | -2.316498 |
| 2018-09-15 | 70.23 | 78.89 | 3.188363 | 8.768785 |
| 2018-10-15 | 70.75 | 81.03 | 0.740424 | 2.712638 |
| 2018-11-15 | 56.96 | 64.75 | -19.491166 | -20.091324 |
| 2018-12-15 | 49.52 | 57.36 | -13.061798 | -11.413127 |
| 2019-01-15 | 51.38 | 59.41 | 3.756058 | 3.573919 |
397 rows × 4 columns
#Converntional Gasoline
df_cGasoline['New York Harbor Conventional Gasoline Monthly Change']=df_cGasoline['New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'].pct_change()*100
df_cGasoline['U.S. Gulf Coast Conventional Gasoline Monthly Change']=df_cGasoline['U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'].pct_change()*100
df_cGasoline['New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)']=df_cGasoline['New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'].astype('float')
df_cGasoline['U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)']=df_cGasoline['U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'].astype('float')
df_cGasoline.Date = pd.to_datetime(df_cGasoline.Date)
df_cGasoline.set_index('Date', inplace=True)
df_cGasoline
| 1 | New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) | U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) | New York Harbor Conventional Gasoline Monthly Change | U.S. Gulf Coast Conventional Gasoline Monthly Change |
|---|---|---|---|---|
| Date | ||||
| 1986-06-15 | 0.420 | 0.409 | NaN | NaN |
| 1986-07-15 | 0.340 | 0.334 | -19.047619 | -18.337408 |
| 1986-08-15 | 0.426 | 0.416 | 25.294118 | 24.550898 |
| 1986-09-15 | 0.420 | 0.398 | -1.408451 | -4.326923 |
| 1986-10-15 | 0.410 | 0.400 | -2.380952 | 0.502513 |
| 1986-11-15 | 0.411 | 0.409 | 0.243902 | 2.250000 |
| 1986-12-15 | 0.434 | 0.414 | 5.596107 | 1.222494 |
| 1987-01-15 | 0.489 | 0.492 | 12.672811 | 18.840580 |
| 1987-02-15 | 0.474 | 0.481 | -3.067485 | -2.235772 |
| 1987-03-15 | 0.509 | 0.506 | 7.383966 | 5.197505 |
| 1987-04-15 | 0.504 | 0.505 | -0.982318 | -0.197628 |
| 1987-05-15 | 0.542 | 0.528 | 7.539683 | 4.554455 |
| 1987-06-15 | 0.557 | 0.535 | 2.767528 | 1.325758 |
| 1987-07-15 | 0.556 | 0.552 | -0.179533 | 3.177570 |
| 1987-08-15 | 0.523 | 0.530 | -5.935252 | -3.985507 |
| 1987-09-15 | 0.518 | 0.496 | -0.956023 | -6.415094 |
| 1987-10-15 | 0.541 | 0.510 | 4.440154 | 2.822581 |
| 1987-11-15 | 0.516 | 0.497 | -4.621072 | -2.549020 |
| 1987-12-15 | 0.454 | 0.432 | -12.015504 | -13.078471 |
| 1988-01-15 | 0.449 | 0.433 | -1.101322 | 0.231481 |
| 1988-02-15 | 0.461 | 0.452 | 2.672606 | 4.387991 |
| 1988-03-15 | 0.452 | 0.456 | -1.952278 | 0.884956 |
| 1988-04-15 | 0.507 | 0.511 | 12.168142 | 12.061404 |
| 1988-05-15 | 0.522 | 0.510 | 2.958580 | -0.195695 |
| 1988-06-15 | 0.518 | 0.498 | -0.766284 | -2.352941 |
| 1988-07-15 | 0.554 | 0.557 | 6.949807 | 11.847390 |
| 1988-08-15 | 0.483 | 0.482 | -12.815884 | -13.464991 |
| 1988-09-15 | 0.466 | 0.470 | -3.519669 | -2.489627 |
| 1988-10-15 | 0.511 | 0.498 | 9.656652 | 5.957447 |
| 1988-11-15 | 0.521 | 0.475 | 1.956947 | -4.618474 |
| ... | ... | ... | ... | ... |
| 2016-08-15 | 1.379 | 1.429 | 1.846381 | 5.228277 |
| 2016-09-15 | 1.438 | 1.393 | 4.278463 | -2.519244 |
| 2016-10-15 | 1.522 | 1.489 | 5.841446 | 6.891601 |
| 2016-11-15 | 1.462 | 1.317 | -3.942181 | -11.551377 |
| 2016-12-15 | 1.634 | 1.566 | 11.764706 | 18.906606 |
| 2017-01-15 | 1.620 | 1.593 | -0.856793 | 1.724138 |
| 2017-02-15 | 1.547 | 1.543 | -4.506173 | -3.138732 |
| 2017-03-15 | 1.492 | 1.523 | -3.555268 | -1.296176 |
| 2017-04-15 | 1.611 | 1.621 | 7.975871 | 6.434668 |
| 2017-05-15 | 1.540 | 1.519 | -4.407200 | -6.292412 |
| 2017-06-15 | 1.445 | 1.432 | -6.168831 | -5.727452 |
| 2017-07-15 | 1.562 | 1.518 | 8.096886 | 6.005587 |
| 2017-08-15 | 1.688 | 1.638 | 8.066581 | 7.905138 |
| 2017-09-15 | 1.867 | 1.753 | 10.604265 | 7.020757 |
| 2017-10-15 | 1.715 | 1.652 | -8.141403 | -5.761552 |
| 2017-11-15 | 1.830 | 1.757 | 6.705539 | 6.355932 |
| 2017-12-15 | 1.757 | 1.702 | -3.989071 | -3.130336 |
| 2018-01-15 | 1.899 | 1.857 | 8.081958 | 9.106933 |
| 2018-02-15 | 1.817 | 1.765 | -4.318062 | -4.954227 |
| 2018-03-15 | 1.834 | 1.820 | 0.935608 | 3.116147 |
| 2018-04-15 | 1.995 | 1.965 | 8.778626 | 7.967033 |
| 2018-05-15 | 2.129 | 2.091 | 6.716792 | 6.412214 |
| 2018-06-15 | 2.030 | 2.002 | -4.650070 | -4.256337 |
| 2018-07-15 | 2.074 | 2.043 | 2.167488 | 2.047952 |
| 2018-08-15 | 2.077 | 2.053 | 0.144648 | 0.489476 |
| 2018-09-15 | 2.093 | 2.045 | 0.770342 | -0.389674 |
| 2018-10-15 | 2.028 | 1.969 | -3.105590 | -3.716381 |
| 2018-11-15 | 1.625 | 1.546 | -19.871795 | -21.482986 |
| 2018-12-15 | 1.449 | 1.357 | -10.830769 | -12.225097 |
| 2019-01-15 | 1.425 | 1.353 | -1.656315 | -0.294768 |
392 rows × 4 columns
#Regular Gasoline
df_rGasoline['Los Angeles Reformulated RBOB Regular Gasoline Monthly Change']=df_rGasoline['Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)'].pct_change()*100
df_rGasoline['Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)']=df_rGasoline['Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)'].astype('float')
df_rGasoline.Date = pd.to_datetime(df_rGasoline.Date)
df_rGasoline.set_index('Date', inplace=True)
df_rGasoline
| 1 | Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon) | Los Angeles Reformulated RBOB Regular Gasoline Monthly Change |
|---|---|---|
| Date | ||
| 2003-06-15 | 1.072 | NaN |
| 2003-07-15 | 0.965 | -9.981343 |
| 2003-08-15 | 1.315 | 36.269430 |
| 2003-09-15 | 0.949 | -27.832700 |
| 2003-10-15 | 0.996 | 4.952582 |
| 2003-11-15 | 1.020 | 2.409639 |
| 2003-12-15 | 0.959 | -5.980392 |
| 2004-01-15 | 1.079 | 12.513034 |
| 2004-02-15 | 1.344 | 24.559778 |
| 2004-03-15 | 1.323 | -1.562500 |
| 2004-04-15 | 1.426 | 7.785336 |
| 2004-05-15 | 1.649 | 15.638149 |
| 2004-06-15 | 1.457 | -11.643420 |
| 2004-07-15 | 1.467 | 0.686342 |
| 2004-08-15 | 1.388 | -5.385140 |
| 2004-09-15 | 1.481 | 6.700288 |
| 2004-10-15 | 1.642 | 10.871033 |
| 2004-11-15 | 1.404 | -14.494519 |
| 2004-12-15 | 1.185 | -15.598291 |
| 2005-01-15 | 1.379 | 16.371308 |
| 2005-02-15 | 1.545 | 12.037708 |
| 2005-03-15 | 1.718 | 11.197411 |
| 2005-04-15 | 1.829 | 6.461001 |
| 2005-05-15 | 1.600 | -12.520503 |
| 2005-06-15 | 1.745 | 9.062500 |
| 2005-07-15 | 1.882 | 7.851003 |
| 2005-08-15 | 2.142 | 13.815090 |
| 2005-09-15 | 2.278 | 6.349206 |
| 2005-10-15 | 1.902 | -16.505707 |
| 2005-11-15 | 1.577 | -17.087277 |
| ... | ... | ... |
| 2016-08-15 | 1.388 | -1.699717 |
| 2016-09-15 | 1.552 | 11.815562 |
| 2016-10-15 | 1.619 | 4.317010 |
| 2016-11-15 | 1.434 | -11.426807 |
| 2016-12-15 | 1.500 | 4.602510 |
| 2017-01-15 | 1.612 | 7.466667 |
| 2017-02-15 | 1.756 | 8.933002 |
| 2017-03-15 | 1.720 | -2.050114 |
| 2017-04-15 | 1.777 | 3.313953 |
| 2017-05-15 | 1.746 | -1.744513 |
| 2017-06-15 | 1.619 | -7.273769 |
| 2017-07-15 | 1.669 | 3.088326 |
| 2017-08-15 | 1.807 | 8.268424 |
| 2017-09-15 | 1.823 | 0.885445 |
| 2017-10-15 | 1.761 | -3.400987 |
| 2017-11-15 | 1.836 | 4.258944 |
| 2017-12-15 | 1.646 | -10.348584 |
| 2018-01-15 | 1.944 | 18.104496 |
| 2018-02-15 | 1.877 | -3.446502 |
| 2018-03-15 | 2.080 | 10.815131 |
| 2018-04-15 | 2.213 | 6.394231 |
| 2018-05-15 | 2.295 | 3.705377 |
| 2018-06-15 | 2.136 | -6.928105 |
| 2018-07-15 | 2.140 | 0.187266 |
| 2018-08-15 | 2.078 | -2.897196 |
| 2018-09-15 | 2.224 | 7.025987 |
| 2018-10-15 | 2.254 | 1.348921 |
| 2018-11-15 | 1.739 | -22.848270 |
| 2018-12-15 | 1.597 | -8.165612 |
| 2019-01-15 | 1.607 | 0.626174 |
188 rows × 2 columns
#Heat Oil
df_heatOil['New York Harbor No. 2 Heating Oil Monthly Change']=df_heatOil['New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)'].pct_change()*100
df_heatOil['New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)']=df_heatOil['New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)'].astype('float')
df_heatOil.Date = pd.to_datetime(df_heatOil.Date)
df_heatOil.set_index('Date', inplace=True)
df_heatOil
| 1 | New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon) | New York Harbor No. 2 Heating Oil Monthly Change |
|---|---|---|
| Date | ||
| 1986-06-15 | 0.380 | NaN |
| 1986-07-15 | 0.334 | -12.105263 |
| 1986-08-15 | 0.408 | 22.155689 |
| 1986-09-15 | 0.402 | -1.470588 |
| 1986-10-15 | 0.394 | -1.990050 |
| 1986-11-15 | 0.422 | 7.106599 |
| 1986-12-15 | 0.447 | 5.924171 |
| 1987-01-15 | 0.521 | 16.554810 |
| 1987-02-15 | 0.474 | -9.021113 |
| 1987-03-15 | 0.491 | 3.586498 |
| 1987-04-15 | 0.485 | -1.221996 |
| 1987-05-15 | 0.518 | 6.804124 |
| 1987-06-15 | 0.523 | 0.965251 |
| 1987-07-15 | 0.547 | 4.588910 |
| 1987-08-15 | 0.524 | -4.204753 |
| 1987-09-15 | 0.523 | -0.190840 |
| 1987-10-15 | 0.563 | 7.648184 |
| 1987-11-15 | 0.559 | -0.710480 |
| 1987-12-15 | 0.536 | -4.114490 |
| 1988-01-15 | 0.518 | -3.358209 |
| 1988-02-15 | 0.481 | -7.142857 |
| 1988-03-15 | 0.475 | -1.247401 |
| 1988-04-15 | 0.515 | 8.421053 |
| 1988-05-15 | 0.509 | -1.165049 |
| 1988-06-15 | 0.448 | -11.984283 |
| 1988-07-15 | 0.427 | -4.687500 |
| 1988-08-15 | 0.434 | 1.639344 |
| 1988-09-15 | 0.406 | -6.451613 |
| 1988-10-15 | 0.402 | -0.985222 |
| 1988-11-15 | 0.449 | 11.691542 |
| ... | ... | ... |
| 2016-08-15 | 1.325 | 2.554180 |
| 2016-09-15 | 1.351 | 1.962264 |
| 2016-10-15 | 1.488 | 10.140637 |
| 2016-11-15 | 1.390 | -6.586022 |
| 2016-12-15 | 1.553 | 11.726619 |
| 2017-01-15 | 1.551 | -0.128783 |
| 2017-02-15 | 1.562 | 0.709220 |
| 2017-03-15 | 1.492 | -4.481434 |
| 2017-04-15 | 1.523 | 2.077748 |
| 2017-05-15 | 1.454 | -4.530532 |
| 2017-06-15 | 1.332 | -8.390646 |
| 2017-07-15 | 1.423 | 6.831832 |
| 2017-08-15 | 1.519 | 6.746311 |
| 2017-09-15 | 1.708 | 12.442396 |
| 2017-10-15 | 1.707 | -0.058548 |
| 2017-11-15 | 1.823 | 6.795548 |
| 2017-12-15 | 1.863 | 2.194185 |
| 2018-01-15 | 2.017 | 8.266237 |
| 2018-02-15 | 1.853 | -8.130887 |
| 2018-03-15 | 1.874 | 1.133297 |
| 2018-04-15 | 2.037 | 8.697972 |
| 2018-05-15 | 2.187 | 7.363770 |
| 2018-06-15 | 2.112 | -3.429355 |
| 2018-07-15 | 2.108 | -0.189394 |
| 2018-08-15 | 2.125 | 0.806452 |
| 2018-09-15 | 2.225 | 4.705882 |
| 2018-10-15 | 2.313 | 3.955056 |
| 2018-11-15 | 2.033 | -12.105491 |
| 2018-12-15 | 1.784 | -12.247909 |
| 2019-01-15 | 1.819 | 1.961883 |
392 rows × 2 columns
#Diesel Fuel
df_diesel['New York Harbor Monthly Change']=df_diesel['New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'].pct_change()*100
df_diesel['U.S. Gulf Coast Monthly Change']=df_diesel['U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'].pct_change()*100
df_diesel['Los Angeles, CA Monthly Change']=df_diesel['Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)'].pct_change()*100
df_diesel['New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)']=df_diesel['New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'].astype('float')
df_diesel['U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)']=df_diesel['U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'].astype('float')
df_diesel['Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)']=df_diesel['Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)'].astype('float')
df_diesel.Date = pd.to_datetime(df_diesel.Date)
df_diesel.set_index('Date', inplace=True)
df_diesel
| 1 | New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) | U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) | Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon) | New York Harbor Monthly Change | U.S. Gulf Coast Monthly Change | Los Angeles, CA Monthly Change |
|---|---|---|---|---|---|---|
| Date | ||||||
| 1996-04-15 | NaN | NaN | 0.950 | NaN | NaN | NaN |
| 1996-05-15 | NaN | NaN | 0.870 | NaN | NaN | -8.421053 |
| 1996-06-15 | NaN | NaN | 0.745 | NaN | NaN | -14.367816 |
| 1996-07-15 | NaN | NaN | 0.692 | NaN | NaN | -7.114094 |
| 1996-08-15 | NaN | NaN | 0.705 | NaN | NaN | 1.878613 |
| 1996-09-15 | NaN | NaN | 0.768 | NaN | NaN | 8.936170 |
| 1996-10-15 | NaN | NaN | 0.817 | NaN | NaN | 6.380208 |
| 1996-11-15 | NaN | NaN | 0.741 | NaN | NaN | -9.302326 |
| 1996-12-15 | NaN | NaN | 0.735 | NaN | NaN | -0.809717 |
| 1997-01-15 | NaN | NaN | 0.773 | NaN | NaN | 5.170068 |
| 1997-02-15 | NaN | NaN | 0.789 | NaN | NaN | 2.069858 |
| 1997-03-15 | NaN | NaN | 0.751 | NaN | NaN | -4.816223 |
| 1997-04-15 | NaN | NaN | 0.731 | NaN | NaN | -2.663116 |
| 1997-05-15 | NaN | NaN | 0.623 | NaN | NaN | -14.774282 |
| 1997-06-15 | NaN | NaN | 0.589 | NaN | NaN | -5.457464 |
| 1997-07-15 | NaN | NaN | 0.571 | NaN | NaN | -3.056027 |
| 1997-08-15 | NaN | NaN | 0.679 | NaN | NaN | 18.914186 |
| 1997-09-15 | NaN | NaN | 0.645 | NaN | NaN | -5.007364 |
| 1997-10-15 | NaN | NaN | 0.685 | NaN | NaN | 6.201550 |
| 1997-11-15 | NaN | NaN | 0.703 | NaN | NaN | 2.627737 |
| 1997-12-15 | NaN | NaN | 0.601 | NaN | NaN | -14.509246 |
| 1998-01-15 | NaN | NaN | 0.566 | NaN | NaN | -5.823627 |
| 1998-02-15 | NaN | NaN | 0.497 | NaN | NaN | -12.190813 |
| 1998-03-15 | NaN | NaN | 0.479 | NaN | NaN | -3.621730 |
| 1998-04-15 | NaN | NaN | 0.552 | NaN | NaN | 15.240084 |
| 1998-05-15 | NaN | NaN | 0.518 | NaN | NaN | -6.159420 |
| 1998-06-15 | NaN | NaN | 0.469 | NaN | NaN | -9.459459 |
| 1998-07-15 | NaN | NaN | 0.470 | NaN | NaN | 0.213220 |
| 1998-08-15 | NaN | NaN | 0.463 | NaN | NaN | -1.489362 |
| 1998-09-15 | NaN | NaN | 0.482 | NaN | NaN | 4.103672 |
| ... | ... | ... | ... | ... | ... | ... |
| 2016-08-15 | 1.399 | 1.379 | 1.400 | 2.116788 | 2.680566 | 0.000000 |
| 2016-09-15 | 1.418 | 1.400 | 1.455 | 1.358113 | 1.522843 | 3.928571 |
| 2016-10-15 | 1.559 | 1.544 | 1.630 | 9.943583 | 10.285714 | 12.027491 |
| 2016-11-15 | 1.463 | 1.427 | 1.539 | -6.157793 | -7.577720 | -5.582822 |
| 2016-12-15 | 1.642 | 1.596 | 1.656 | 12.235133 | 11.843027 | 7.602339 |
| 2017-01-15 | 1.623 | 1.593 | 1.671 | -1.157125 | -0.187970 | 0.905797 |
| 2017-02-15 | 1.624 | 1.608 | 1.670 | 0.061614 | 0.941620 | -0.059844 |
| 2017-03-15 | 1.528 | 1.502 | 1.563 | -5.911330 | -6.592040 | -6.407186 |
| 2017-04-15 | 1.586 | 1.558 | 1.639 | 3.795812 | 3.728362 | 4.862444 |
| 2017-05-15 | 1.514 | 1.482 | 1.547 | -4.539723 | -4.878049 | -5.613179 |
| 2017-06-15 | 1.417 | 1.386 | 1.460 | -6.406869 | -6.477733 | -5.623788 |
| 2017-07-15 | 1.521 | 1.486 | 1.570 | 7.339450 | 7.215007 | 7.534247 |
| 2017-08-15 | 1.631 | 1.601 | 1.697 | 7.232084 | 7.738896 | 8.089172 |
| 2017-09-15 | 1.791 | 1.777 | 1.915 | 9.809933 | 10.993129 | 12.846199 |
| 2017-10-15 | 1.799 | 1.756 | 1.863 | 0.446678 | -1.181767 | -2.715405 |
| 2017-11-15 | 1.916 | 1.851 | 1.975 | 6.503613 | 5.410023 | 6.011809 |
| 2017-12-15 | 1.944 | 1.867 | 1.907 | 1.461378 | 0.864398 | -3.443038 |
| 2018-01-15 | 2.074 | 1.996 | 2.056 | 6.687243 | 6.909480 | 7.813319 |
| 2018-02-15 | 1.937 | 1.895 | 1.963 | -6.605593 | -5.060120 | -4.523346 |
| 2018-03-15 | 1.930 | 1.895 | 1.996 | -0.361384 | 0.000000 | 1.681100 |
| 2018-04-15 | 2.079 | 2.029 | 2.154 | 7.720207 | 7.071240 | 7.915832 |
| 2018-05-15 | 2.230 | 2.181 | 2.288 | 7.263107 | 7.491375 | 6.220984 |
| 2018-06-15 | 2.146 | 2.102 | 2.185 | -3.766816 | -3.622192 | -4.501748 |
| 2018-07-15 | 2.140 | 2.098 | 2.177 | -0.279590 | -0.190295 | -0.366133 |
| 2018-08-15 | 2.153 | 2.114 | 2.205 | 0.607477 | 0.762631 | 1.286174 |
| 2018-09-15 | 2.252 | 2.210 | 2.293 | 4.598235 | 4.541154 | 3.990930 |
| 2018-10-15 | 2.332 | 2.282 | 2.369 | 3.552398 | 3.257919 | 3.314435 |
| 2018-11-15 | 2.048 | 1.962 | 2.055 | -12.178388 | -14.022787 | -13.254538 |
| 2018-12-15 | 1.801 | 1.687 | 1.793 | -12.060547 | -14.016310 | -12.749392 |
| 2019-01-15 | 1.846 | 1.774 | 1.812 | 2.498612 | 5.157084 | 1.059677 |
274 rows × 6 columns
#Jet Fuel
df_jet['U.S. Gulf Coast Monthly Change']=df_jet['U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)'].pct_change()*100
df_jet['U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)']=df_jet['U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)'].astype('float')
df_jet.Date = pd.to_datetime(df_jet.Date)
df_jet.set_index('Date', inplace=True)
df_jet
| 1 | U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon) | U.S. Gulf Coast Monthly Change |
|---|---|---|
| Date | ||
| 1990-04-15 | 0.540 | NaN |
| 1990-05-15 | 0.515 | -4.629630 |
| 1990-06-15 | 0.494 | -4.077670 |
| 1990-07-15 | 0.535 | 8.299595 |
| 1990-08-15 | 0.791 | 47.850467 |
| 1990-09-15 | 1.012 | 27.939317 |
| 1990-10-15 | 1.196 | 18.181818 |
| 1990-11-15 | 0.971 | -18.812709 |
| 1990-12-15 | 0.803 | -17.301751 |
| 1991-01-15 | 0.741 | -7.721046 |
| 1991-02-15 | 0.637 | -14.035088 |
| 1991-03-15 | 0.558 | -12.401884 |
| 1991-04-15 | 0.552 | -1.075269 |
| 1991-05-15 | 0.569 | 3.079710 |
| 1991-06-15 | 0.547 | -3.866432 |
| 1991-07-15 | 0.586 | 7.129799 |
| 1991-08-15 | 0.623 | 6.313993 |
| 1991-09-15 | 0.635 | 1.926164 |
| 1991-10-15 | 0.671 | 5.669291 |
| 1991-11-15 | 0.645 | -3.874814 |
| 1991-12-15 | 0.522 | -19.069767 |
| 1992-01-15 | 0.509 | -2.490421 |
| 1992-02-15 | 0.543 | 6.679764 |
| 1992-03-15 | 0.514 | -5.340700 |
| 1992-04-15 | 0.543 | 5.642023 |
| 1992-05-15 | 0.579 | 6.629834 |
| 1992-06-15 | 0.615 | 6.217617 |
| 1992-07-15 | 0.611 | -0.650407 |
| 1992-08-15 | 0.590 | -3.436989 |
| 1992-09-15 | 0.623 | 5.593220 |
| ... | ... | ... |
| 2016-08-15 | 1.295 | 1.808176 |
| 2016-09-15 | 1.319 | 1.853282 |
| 2016-10-15 | 1.457 | 10.462472 |
| 2016-11-15 | 1.356 | -6.932052 |
| 2016-12-15 | 1.491 | 9.955752 |
| 2017-01-15 | 1.514 | 1.542589 |
| 2017-02-15 | 1.547 | 2.179657 |
| 2017-03-15 | 1.445 | -6.593407 |
| 2017-04-15 | 1.510 | 4.498270 |
| 2017-05-15 | 1.412 | -6.490066 |
| 2017-06-15 | 1.295 | -8.286119 |
| 2017-07-15 | 1.417 | 9.420849 |
| 2017-08-15 | 1.561 | 10.162315 |
| 2017-09-15 | 1.800 | 15.310698 |
| 2017-10-15 | 1.659 | -7.833333 |
| 2017-11-15 | 1.760 | 6.088005 |
| 2017-12-15 | 1.817 | 3.238636 |
| 2018-01-15 | 1.952 | 7.429829 |
| 2018-02-15 | 1.849 | -5.276639 |
| 2018-03-15 | 1.858 | 0.486750 |
| 2018-04-15 | 2.015 | 8.449946 |
| 2018-05-15 | 2.155 | 6.947891 |
| 2018-06-15 | 2.090 | -3.016241 |
| 2018-07-15 | 2.098 | 0.382775 |
| 2018-08-15 | 2.118 | 0.953289 |
| 2018-09-15 | 2.189 | 3.352219 |
| 2018-10-15 | 2.249 | 2.740978 |
| 2018-11-15 | 1.945 | -13.517119 |
| 2018-12-15 | 1.696 | -12.802057 |
| 2019-01-15 | 1.784 | 5.188679 |
346 rows × 2 columns
#Propane
df_prop['Mont Belvieu, TX Propane Monthly Change']=df_prop['Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)'].pct_change()*100
df_prop['Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)']=df_prop['Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)'].astype('float')
df_prop.Date = pd.to_datetime(df_prop.Date)
df_prop.set_index('Date', inplace=True)
df_prop
| 1 | Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon) | Mont Belvieu, TX Propane Monthly Change |
|---|---|---|
| Date | ||
| 1992-06-15 | 0.344 | NaN |
| 1992-07-15 | 0.342 | -0.581395 |
| 1992-08-15 | 0.354 | 3.508772 |
| 1992-09-15 | 0.375 | 5.932203 |
| 1992-10-15 | 0.354 | -5.600000 |
| 1992-11-15 | 0.327 | -7.627119 |
| 1992-12-15 | 0.312 | -4.587156 |
| 1993-01-15 | 0.337 | 8.012821 |
| 1993-02-15 | 0.330 | -2.077151 |
| 1993-03-15 | 0.342 | 3.636364 |
| 1993-04-15 | 0.344 | 0.584795 |
| 1993-05-15 | 0.328 | -4.651163 |
| 1993-06-15 | 0.328 | 0.000000 |
| 1993-07-15 | 0.314 | -4.268293 |
| 1993-08-15 | 0.305 | -2.866242 |
| 1993-09-15 | 0.299 | -1.967213 |
| 1993-10-15 | 0.293 | -2.006689 |
| 1993-11-15 | 0.275 | -6.143345 |
| 1993-12-15 | 0.245 | -10.909091 |
| 1994-01-15 | 0.263 | 7.346939 |
| 1994-02-15 | 0.290 | 10.266160 |
| 1994-03-15 | 0.284 | -2.068966 |
| 1994-04-15 | 0.289 | 1.760563 |
| 1994-05-15 | 0.296 | 2.422145 |
| 1994-06-15 | 0.288 | -2.702703 |
| 1994-07-15 | 0.292 | 1.388889 |
| 1994-08-15 | 0.300 | 2.739726 |
| 1994-09-15 | 0.299 | -0.333333 |
| 1994-10-15 | 0.324 | 8.361204 |
| 1994-11-15 | 0.344 | 6.172840 |
| ... | ... | ... |
| 2016-08-15 | 0.449 | -6.066946 |
| 2016-09-15 | 0.495 | 10.244989 |
| 2016-10-15 | 0.573 | 15.757576 |
| 2016-11-15 | 0.538 | -6.108202 |
| 2016-12-15 | 0.637 | 18.401487 |
| 2017-01-15 | 0.747 | 17.268446 |
| 2017-02-15 | 0.768 | 2.811245 |
| 2017-03-15 | 0.615 | -19.921875 |
| 2017-04-15 | 0.651 | 5.853659 |
| 2017-05-15 | 0.640 | -1.689708 |
| 2017-06-15 | 0.590 | -7.812500 |
| 2017-07-15 | 0.647 | 9.661017 |
| 2017-08-15 | 0.758 | 17.156105 |
| 2017-09-15 | 0.883 | 16.490765 |
| 2017-10-15 | 0.934 | 5.775764 |
| 2017-11-15 | 0.980 | 4.925054 |
| 2017-12-15 | 0.957 | -2.346939 |
| 2018-01-15 | 0.904 | -5.538140 |
| 2018-02-15 | 0.827 | -8.517699 |
| 2018-03-15 | 0.788 | -4.715840 |
| 2018-04-15 | 0.821 | 4.187817 |
| 2018-05-15 | 0.917 | 11.693057 |
| 2018-06-15 | 0.880 | -4.034896 |
| 2018-07-15 | 0.938 | 6.590909 |
| 2018-08-15 | 0.974 | 3.837953 |
| 2018-09-15 | 1.057 | 8.521561 |
| 2018-10-15 | 0.959 | -9.271523 |
| 2018-11-15 | 0.745 | -22.314911 |
| 2018-12-15 | 0.677 | -9.127517 |
| 2019-01-15 | 0.665 | -1.772526 |
320 rows × 2 columns
def getYearlySummary(data):
return data.resample('Y').mean()
#Crude Oil Yearly Summarized Table
getYearlySummary(df_crudeOil)
| 1 | Cushing, OK WTI Spot Price FOB (Dollars per Barrel) | Europe Brent Spot Price FOB (Dollars per Barrel) | Cushing, OK WTI Monthly Change | Europe Brent Monthly Change |
|---|---|---|---|---|
| Date | ||||
| 1986-12-31 | 15.036667 | NaN | -1.711560 | NaN |
| 1987-12-31 | 19.171667 | 18.522500 | 0.780099 | -1.146253 |
| 1988-12-31 | 15.982500 | 14.948333 | -0.239611 | -0.582507 |
| 1989-12-31 | 19.640833 | 18.251667 | 2.261132 | 2.389460 |
| 1990-12-31 | 24.467500 | 23.675833 | 3.418130 | 4.598114 |
| 1991-12-31 | 21.502500 | 20.010833 | -2.465033 | -3.154075 |
| 1992-12-31 | 20.563333 | 19.305833 | 0.044119 | -0.033064 |
| 1993-12-31 | 18.450833 | 17.042500 | -2.264988 | -2.190267 |
| 1994-12-31 | 17.191667 | 15.840833 | 1.548474 | 1.380291 |
| 1995-12-31 | 18.439167 | 17.044167 | 0.965250 | 1.116250 |
| 1996-12-31 | 22.113333 | 20.635000 | 2.571583 | 2.548812 |
| 1997-12-31 | 20.610000 | 19.118333 | -2.457018 | -2.450318 |
| 1998-12-31 | 14.446667 | 12.779167 | -3.714227 | -4.232521 |
| 1999-12-31 | 19.260833 | 17.846667 | 7.472825 | 8.671402 |
| 2000-12-31 | 30.300833 | 28.522500 | 1.143356 | 0.731548 |
| 2001-12-31 | 25.947500 | 24.450000 | -2.954396 | -2.215634 |
| 2002-12-31 | 26.115000 | 24.963333 | 3.781293 | 3.825545 |
| 2003-12-31 | 31.120833 | 28.882500 | 1.054619 | 0.772481 |
| 2004-12-31 | 41.443333 | 38.230000 | 2.789403 | 2.763941 |
| 2005-12-31 | 56.492500 | 54.418333 | 2.920053 | 3.355490 |
| 2006-12-31 | 66.018333 | 65.146667 | 0.573744 | 1.078917 |
| 2007-12-31 | 72.318333 | 72.465000 | 3.544767 | 3.472133 |
| 2008-12-31 | 99.571667 | 96.847500 | -5.339015 | -5.528531 |
| 2009-12-31 | 61.654167 | 61.490000 | 5.530528 | 5.619015 |
| 2010-12-31 | 79.395000 | 79.511667 | 1.657553 | 1.853841 |
| 2011-12-31 | 94.874167 | 111.264167 | 1.107554 | 1.523111 |
| 2012-12-31 | 94.110833 | 111.651667 | -0.780515 | 0.354714 |
| 2013-12-31 | 97.905833 | 108.637500 | 0.983733 | 0.155575 |
| 2014-12-31 | 93.258333 | 99.023333 | -3.784017 | -4.448734 |
| 2015-12-31 | 48.688333 | 52.353333 | -3.214574 | -3.321777 |
| 2016-12-31 | 43.144167 | 43.548333 | 3.396200 | 3.447007 |
| 2017-12-31 | 50.884167 | 54.247500 | 1.020199 | 1.715749 |
| 2018-12-31 | 64.938333 | 71.060833 | -0.948189 | -0.580773 |
| 2019-12-31 | 51.380000 | 59.410000 | 3.756058 | 3.573919 |
#Conventional Gasoline Yearly Summarized Table
getYearlySummary(df_cGasoline)
| 1 | New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) | U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) | New York Harbor Conventional Gasoline Monthly Change | U.S. Gulf Coast Conventional Gasoline Monthly Change |
|---|---|---|---|---|
| Date | ||||
| 1986-12-31 | 0.408714 | 0.397143 | 1.382851 | 0.976929 |
| 1987-12-31 | 0.515250 | 0.505333 | 0.587246 | 0.621413 |
| 1988-12-31 | 0.493000 | 0.481667 | 0.566859 | 0.371622 |
| 1989-12-31 | 0.569667 | 0.557417 | 1.724462 | 2.252828 |
| 1990-12-31 | 0.728250 | 0.707500 | 2.908794 | 2.803443 |
| 1991-12-31 | 0.662083 | 0.634917 | -1.414225 | -1.542236 |
| 1992-12-31 | 0.590833 | 0.574667 | -0.225881 | 0.003469 |
| 1993-12-31 | 0.515500 | 0.508583 | -2.613259 | -2.505896 |
| 1994-12-31 | 0.490333 | 0.479417 | 2.163158 | 1.868256 |
| 1995-12-31 | 0.542583 | 0.509250 | 1.549958 | 1.319584 |
| 1996-12-31 | 0.618833 | 0.596417 | 2.389770 | 2.768112 |
| 1997-12-31 | 0.603667 | 0.585250 | -1.972954 | -1.957428 |
| 1998-12-31 | 0.427833 | 0.413250 | -3.994616 | -3.966470 |
| 1999-12-31 | 0.541750 | 0.519500 | 7.771594 | 7.561274 |
| 2000-12-31 | 0.859250 | 0.834333 | 1.204309 | 1.502782 |
| 2001-12-31 | 0.740500 | 0.739500 | -2.088952 | -1.696620 |
| 2002-12-31 | 0.722667 | 0.718583 | 4.079436 | 4.204147 |
| 2003-12-31 | 0.885167 | 0.871750 | 1.182203 | 1.258588 |
| 2004-12-31 | 1.179667 | 1.168667 | 2.091784 | 2.112571 |
| 2005-12-31 | 1.558667 | 1.589500 | 4.185396 | 4.565266 |
| 2006-12-31 | 1.821917 | 1.824000 | 1.097165 | 0.840040 |
| 2007-12-31 | 2.059333 | 2.037833 | 3.214617 | 3.416356 |
| 2008-12-31 | 2.448333 | 2.468750 | -5.804690 | -5.357999 |
| 2009-12-31 | 1.659083 | 1.629333 | 6.380316 | 6.547560 |
| 2010-12-31 | 2.092250 | 2.051083 | 1.989649 | 1.829971 |
| 2011-12-31 | 2.795000 | 2.745750 | 1.000851 | 1.013929 |
| 2012-12-31 | 2.939667 | 2.813667 | 0.559886 | 0.072831 |
| 2013-12-31 | 2.812917 | 2.696000 | 0.124165 | 0.298184 |
| 2014-12-31 | 2.612333 | 2.488083 | -3.666401 | -4.185907 |
| 2015-12-31 | 1.611750 | 1.553000 | -1.776860 | -0.977255 |
| 2016-12-31 | 1.390750 | 1.331750 | 2.527379 | 3.099277 |
| 2017-12-31 | 1.639500 | 1.604250 | 0.818700 | 0.841630 |
| 2018-12-31 | 1.920833 | 1.876083 | -1.265069 | -1.490412 |
| 2019-12-31 | 1.425000 | 1.353000 | -1.656315 | -0.294768 |
#Regular Gasoline Yearly Summarized Table
getYearlySummary(df_rGasoline)
| 1 | Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon) | Los Angeles Reformulated RBOB Regular Gasoline Monthly Change |
|---|---|---|
| Date | ||
| 2003-12-31 | 1.039429 | -0.027131 |
| 2004-12-31 | 1.403750 | 2.505841 |
| 2005-12-31 | 1.766917 | 3.239223 |
| 2006-12-31 | 2.062583 | 1.665185 |
| 2007-12-31 | 2.291833 | 3.017479 |
| 2008-12-31 | 2.626583 | -4.946264 |
| 2009-12-31 | 1.840917 | 5.602993 |
| 2010-12-31 | 2.209667 | 1.691042 |
| 2011-12-31 | 2.894917 | 1.172236 |
| 2012-12-31 | 3.030583 | 0.137166 |
| 2013-12-31 | 2.921917 | 0.444020 |
| 2014-12-31 | 2.668083 | -4.196740 |
| 2015-12-31 | 1.945750 | 2.388995 |
| 2016-12-31 | 1.453833 | 1.019094 |
| 2017-12-31 | 1.731000 | 0.949733 |
| 2018-12-31 | 2.048083 | 0.274644 |
| 2019-12-31 | 1.607000 | 0.626174 |
#Heat Oil Yearly Summarized Table
getYearlySummary(df_heatOil)
| 1 | New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon) | New York Harbor No. 2 Heating Oil Monthly Change |
|---|---|---|
| Date | ||
| 1986-12-31 | 0.398143 | 3.270093 |
| 1987-12-31 | 0.522000 | 1.723675 |
| 1988-12-31 | 0.464500 | -0.140371 |
| 1989-12-31 | 0.557333 | 4.389577 |
| 1990-12-31 | 0.687833 | 1.049165 |
| 1991-12-31 | 0.612500 | -3.113607 |
| 1992-12-31 | 0.572333 | 0.513530 |
| 1993-12-31 | 0.524750 | -1.792132 |
| 1994-12-31 | 0.494000 | 1.086155 |
| 1995-12-31 | 0.493417 | 1.595513 |
| 1996-12-31 | 0.631000 | 2.246832 |
| 1997-12-31 | 0.563917 | -2.652192 |
| 1998-12-31 | 0.393000 | -3.764043 |
| 1999-12-31 | 0.488917 | 6.953559 |
| 2000-12-31 | 0.877500 | 3.605584 |
| 2001-12-31 | 0.712000 | -4.587341 |
| 2002-12-31 | 0.679500 | 4.021223 |
| 2003-12-31 | 0.850167 | 1.341614 |
| 2004-12-31 | 1.116500 | 3.347040 |
| 2005-12-31 | 1.621417 | 2.761157 |
| 2006-12-31 | 1.803333 | 0.091258 |
| 2007-12-31 | 2.031583 | 3.784936 |
| 2008-12-31 | 2.852750 | -4.043217 |
| 2009-12-31 | 1.641417 | 3.260586 |
| 2010-12-31 | 2.124833 | 1.998923 |
| 2011-12-31 | 2.946333 | 1.448881 |
| 2012-12-31 | 3.023750 | 0.448131 |
| 2013-12-31 | 2.925917 | 0.169685 |
| 2014-12-31 | 2.696417 | -3.869339 |
| 2015-12-31 | 1.546750 | -4.214499 |
| 2016-12-31 | 1.283500 | 3.756562 |
| 2017-12-31 | 1.579750 | 1.683941 |
| 2018-12-31 | 2.055667 | -0.097864 |
| 2019-12-31 | 1.819000 | 1.961883 |
#Diesel Yearly Summarized Table
getYearlySummary(df_diesel)
| 1 | New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) | U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) | Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon) | New York Harbor Monthly Change | U.S. Gulf Coast Monthly Change | Los Angeles, CA Monthly Change |
|---|---|---|---|---|---|---|
| Date | ||||||
| 1996-12-31 | NaN | NaN | 0.780333 | NaN | NaN | -2.852502 |
| 1997-12-31 | NaN | NaN | 0.678333 | NaN | NaN | -1.275027 |
| 1998-12-31 | NaN | NaN | 0.485917 | NaN | NaN | -3.218516 |
| 1999-12-31 | NaN | NaN | 0.631000 | NaN | NaN | 6.840829 |
| 2000-12-31 | NaN | NaN | 0.977917 | NaN | NaN | 3.209220 |
| 2001-12-31 | NaN | NaN | 0.816000 | NaN | NaN | -4.702413 |
| 2002-12-31 | NaN | NaN | 0.749917 | NaN | NaN | 3.658444 |
| 2003-12-31 | NaN | NaN | 0.914667 | NaN | NaN | 1.809087 |
| 2004-12-31 | NaN | NaN | 1.317667 | NaN | NaN | 2.913440 |
| 2005-12-31 | NaN | NaN | 1.789083 | NaN | NaN | 3.346248 |
| 2006-12-31 | 1.971857 | 1.961857 | 2.082583 | -1.798454 | -2.307280 | 1.522250 |
| 2007-12-31 | 2.151417 | 2.145750 | 2.248167 | 3.254040 | 3.206231 | 2.210897 |
| 2008-12-31 | 2.973583 | 2.920333 | 2.908417 | -3.996835 | -4.069422 | -4.281085 |
| 2009-12-31 | 1.694417 | 1.659250 | 1.696417 | 3.071251 | 3.373839 | 3.777950 |
| 2010-12-31 | 2.194083 | 2.155917 | 2.204083 | 2.035142 | 1.951068 | 1.976082 |
| 2011-12-31 | 3.014417 | 2.969500 | 3.048750 | 1.482042 | 1.497582 | 1.452001 |
| 2012-12-31 | 3.113250 | 3.052917 | 3.106083 | 0.518776 | 0.401727 | 0.405499 |
| 2013-12-31 | 3.014000 | 2.967833 | 3.012750 | -0.017374 | 0.038096 | 0.158181 |
| 2014-12-31 | 2.804833 | 2.710833 | 2.785750 | -3.164201 | -3.875548 | -3.530439 |
| 2015-12-31 | 1.663333 | 1.579167 | 1.660333 | -4.231931 | -3.542397 | -2.949633 |
| 2016-12-31 | 1.351417 | 1.317500 | 1.386667 | 3.481512 | 3.589359 | 3.025174 |
| 2017-12-31 | 1.657833 | 1.622250 | 1.706417 | 1.552959 | 1.464490 | 1.365602 |
| 2018-12-31 | 2.093500 | 2.037583 | 2.127833 | -0.401971 | -0.573159 | -0.264365 |
| 2019-12-31 | 1.846000 | 1.774000 | 1.812000 | 2.498612 | 5.157084 | 1.059677 |
#Jet Fuel Yearly Summarized Table
getYearlySummary(df_jet)
| 1 | U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon) | U.S. Gulf Coast Monthly Change |
|---|---|---|
| Date | ||
| 1990-12-31 | 0.761889 | 7.181180 |
| 1991-12-31 | 0.607167 | -3.160445 |
| 1992-12-31 | 0.571083 | 0.485898 |
| 1993-12-31 | 0.529333 | -1.445555 |
| 1994-12-31 | 0.493333 | 0.953750 |
| 1995-12-31 | 0.494333 | 1.184620 |
| 1996-12-31 | 0.610667 | 2.087636 |
| 1997-12-31 | 0.560083 | -2.616431 |
| 1998-12-31 | 0.403500 | -3.719445 |
| 1999-12-31 | 0.496083 | 7.536698 |
| 2000-12-31 | 0.848667 | 2.086970 |
| 2001-12-31 | 0.723667 | -3.964171 |
| 2002-12-31 | 0.685333 | 4.085589 |
| 2003-12-31 | 0.825667 | 1.195667 |
| 2004-12-31 | 1.149917 | 3.210222 |
| 2005-12-31 | 1.710167 | 3.758593 |
| 2006-12-31 | 1.920750 | 0.596269 |
| 2007-12-31 | 2.129917 | 3.228612 |
| 2008-12-31 | 2.961667 | -3.963651 |
| 2009-12-31 | 1.659250 | 3.491947 |
| 2010-12-31 | 2.146083 | 1.897716 |
| 2011-12-31 | 2.996083 | 1.453829 |
| 2012-12-31 | 3.056500 | 0.364183 |
| 2013-12-31 | 2.923417 | 0.130552 |
| 2014-12-31 | 2.696500 | -3.822657 |
| 2015-12-31 | 1.525500 | -3.639839 |
| 2016-12-31 | 1.249167 | 3.047348 |
| 2017-12-31 | 1.561417 | 1.936508 |
| 2018-12-31 | 2.017833 | -0.322365 |
| 2019-12-31 | 1.784000 | 5.188679 |
#Propane Yearly Summarized Table
getYearlySummary(df_prop)
| 1 | Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon) | Mont Belvieu, TX Propane Monthly Change |
|---|---|---|
| Date | ||
| 1992-12-31 | 0.344000 | -1.492449 |
| 1993-12-31 | 0.311667 | -1.887934 |
| 1994-12-31 | 0.300167 | 2.679649 |
| 1995-12-31 | 0.320417 | 0.608008 |
| 1996-12-31 | 0.423250 | 4.888069 |
| 1997-12-31 | 0.374083 | -4.409562 |
| 1998-12-31 | 0.261250 | -3.674093 |
| 1999-12-31 | 0.340417 | 6.365428 |
| 2000-12-31 | 0.581583 | 5.788992 |
| 2001-12-31 | 0.472417 | -7.223847 |
| 2002-12-31 | 0.409333 | 5.028779 |
| 2003-12-31 | 0.577750 | 2.408560 |
| 2004-12-31 | 0.742583 | 2.313509 |
| 2005-12-31 | 0.912500 | 3.005146 |
| 2006-12-31 | 1.013417 | -0.553427 |
| 2007-12-31 | 1.207417 | 4.041372 |
| 2008-12-31 | 1.412083 | -6.365971 |
| 2009-12-31 | 0.841833 | 6.237997 |
| 2010-12-31 | 1.165500 | 0.899653 |
| 2011-12-31 | 1.463333 | 0.662267 |
| 2012-12-31 | 1.003917 | -4.133757 |
| 2013-12-31 | 1.000750 | 4.117555 |
| 2014-12-31 | 1.042000 | -5.824235 |
| 2015-12-31 | 0.457000 | -2.181722 |
| 2016-12-31 | 0.484417 | 4.811955 |
| 2017-12-31 | 0.764167 | 4.014253 |
| 2018-12-31 | 0.873917 | -2.390769 |
| 2019-12-31 | 0.665000 | -1.772526 |
#Functions needed for Exploratory Analysis
def getImportantMonths(data):
comments=[]
for i in data.columns:
comments.append(str(i))
comments.append("min: "+str(min(data[i].fillna(9999)))+" in "+str(data[data[i] == min(data[i].fillna(9999))].index.tolist()[0]))
comments.append("max: "+str(max(data[i].fillna(-1)))+" in "+str(data[data[i] == max(data[i].fillna(-1))].index.tolist()[0]))
return comments
def getDistribution(data):
for i in data.columns:
print("Distribution of "+str(i))
plt.hist(data[i])
plt.show()
Crude Oil Analysis
#Crude Oil Data Visualizations
trace0 = go.Scatter(
x = df_crudeOil.index,
y = df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)'],
name = 'Cushing, OK WTI',
line = dict(
color = ('green'),
width = 2)
)
trace1 = go.Scatter(
x = df_crudeOil.index,
y = df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'],
name = 'Europe Brent',
line = dict(
color = ('orange'),
width = 2,)
)
data = [trace0, trace1]
layout = dict(title = 'Crude Oil Analysis',
xaxis = dict(title = 'Year'),
yaxis = dict(title = 'Dollars per Barrel'),
)
fig = dict(data=data, layout=layout)
offline.iplot(fig, filename='styled-line')
df_crudeOil.describe()
| 1 | Cushing, OK WTI Spot Price FOB (Dollars per Barrel) | Europe Brent Spot Price FOB (Dollars per Barrel) | Cushing, OK WTI Monthly Change | Europe Brent Monthly Change |
|---|---|---|---|---|
| count | 397.000000 | 381.000000 | 396.000000 | 380.000000 |
| mean | 43.808791 | 46.000394 | 0.580215 | 0.703086 |
| std | 29.460501 | 33.013959 | 8.665466 | 8.972040 |
| min | 11.350000 | 9.820000 | -32.577410 | -26.725342 |
| 25% | 19.660000 | 18.670000 | -4.673542 | -4.804217 |
| 50% | 29.840000 | 30.200000 | 1.136758 | 0.799366 |
| 75% | 63.800000 | 67.490000 | 5.709207 | 6.337508 |
| max | 133.880000 | 132.720000 | 48.021680 | 58.241118 |
getImportantMonths(df_crudeOil)
['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)', 'min: 11.35 in 1998-12-15 00:00:00', 'max: 133.88 in 2008-06-15 00:00:00', 'Europe Brent Spot Price FOB (Dollars per Barrel)', 'min: 9.82 in 1998-12-15 00:00:00', 'max: 132.72 in 2008-07-15 00:00:00', 'Cushing, OK WTI Monthly Change', 'min: -32.57740950719581 in 1986-02-15 00:00:00', 'max: 48.021680216802174 in 1990-08-15 00:00:00', 'Europe Brent Monthly Change', 'min: -26.725342274378317 in 2008-11-15 00:00:00', 'max: 58.241118229470004 in 1990-08-15 00:00:00']
getDistribution(df_crudeOil)
Distribution of Cushing, OK WTI Spot Price FOB (Dollars per Barrel)
Distribution of Europe Brent Spot Price FOB (Dollars per Barrel)
Distribution of Cushing, OK WTI Monthly Change
Distribution of Europe Brent Monthly Change
cmap = sns.color_palette("Blues")
sns.heatmap(df_crudeOil.iloc[:,:2],cmap=cmap)
<matplotlib.axes._subplots.AxesSubplot at 0x228fddbc7b8>
Conventional Gasoline
trace0 = go.Scatter(
x = df_cGasoline.index,
y = df_cGasoline['New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'],
name = 'New York Harbor',
line = dict(
color = ('green'),
width = 2)
)
trace1 = go.Scatter(
x = df_cGasoline.index,
y = df_cGasoline['U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'],
name = 'U.S. Gulf Coast',
line = dict(
color = ('orange'),
width = 2,)
)
data = [trace0, trace1]
layout = dict(title = 'Conventional Gasoline Analysis',
xaxis = dict(title = 'Year'),
yaxis = dict(title = 'Dollars per Gallon'),
)
fig = dict(data=data, layout=layout)
offline.iplot(fig, filename='styled-line')
df_cGasoline.describe()
| 1 | New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) | U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) | New York Harbor Conventional Gasoline Monthly Change | U.S. Gulf Coast Conventional Gasoline Monthly Change |
|---|---|---|---|---|
| count | 392.000000 | 392.000000 | 391.000000 | 391.000000 |
| mean | 1.267541 | 1.238556 | 0.793295 | 0.858009 |
| std | 0.829709 | 0.813193 | 9.777725 | 10.480756 |
| min | 0.307000 | 0.294000 | -33.177083 | -43.084767 |
| 25% | 0.556750 | 0.548750 | -4.872642 | -5.501255 |
| 50% | 0.886500 | 0.863500 | 0.770342 | 1.016949 |
| 75% | 1.919250 | 1.866000 | 6.620545 | 6.704321 |
| max | 3.292000 | 3.284000 | 37.366003 | 43.853821 |
getImportantMonths(df_cGasoline)
['New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)', 'min: 0.307 in 1998-12-15 00:00:00', 'max: 3.292 in 2008-06-15 00:00:00', 'U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)', 'min: 0.294 in 1998-12-15 00:00:00', 'max: 3.284 in 2008-06-15 00:00:00', 'New York Harbor Conventional Gasoline Monthly Change', 'min: -33.177083333333336 in 2008-11-15 00:00:00', 'max: 37.36600306278712 in 1990-08-15 00:00:00', 'U.S. Gulf Coast Conventional Gasoline Monthly Change', 'min: -43.08476736775015 in 2008-10-15 00:00:00', 'max: 43.85382059800664 in 1990-08-15 00:00:00']
getDistribution(df_cGasoline)
Distribution of New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)
Distribution of U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)
Distribution of New York Harbor Conventional Gasoline Monthly Change
Distribution of U.S. Gulf Coast Conventional Gasoline Monthly Change
cmap = sns.color_palette("Blues")
sns.heatmap(df_cGasoline.iloc[:,:2],cmap=cmap)
<matplotlib.axes._subplots.AxesSubplot at 0x228fdfb5f60>
Regular Gasoline
trace0 = go.Scatter(
x = df_rGasoline.index,
y = df_rGasoline['Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)'],
name = 'New York Harbor',
line = dict(
color = ('green'),
width = 2)
)
data = [trace0]
layout = dict(title = 'Regular Gasoline Analysis',
xaxis = dict(title = 'Year'),
yaxis = dict(title = 'Dollars per Gallon'),
)
fig = dict(data=data, layout=layout)
offline.iplot(fig, filename='styled-line')
df_rGasoline.describe()
| 1 | Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon) | Los Angeles Reformulated RBOB Regular Gasoline Monthly Change |
|---|---|---|
| count | 188.000000 | 187.000000 |
| mean | 2.147021 | 0.962776 |
| std | 0.631317 | 12.348753 |
| min | 0.949000 | -40.329412 |
| 25% | 1.642000 | -6.344894 |
| 50% | 2.079000 | 1.097017 |
| 75% | 2.668500 | 7.790218 |
| max | 3.694000 | 57.525773 |
getImportantMonths(df_rGasoline)
['Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)', 'min: 0.949 in 2003-09-15 00:00:00', 'max: 3.694 in 2008-06-15 00:00:00', 'Los Angeles Reformulated RBOB Regular Gasoline Monthly Change', 'min: -40.32941176470588 in 2008-11-15 00:00:00', 'max: 57.525773195876305 in 2016-03-15 00:00:00']
getDistribution(df_rGasoline)
Distribution of Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)
Distribution of Los Angeles Reformulated RBOB Regular Gasoline Monthly Change
Heat Oil Analyses
#Heat Oil
trace0 = go.Scatter(
x = df_heatOil.index,
y = df_heatOil['New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)'],
name = 'New York Harbor',
line = dict(
color = ('green'),
width = 2)
)
data = [trace0]
layout = dict(title = 'Heat Oil Analysis',
xaxis = dict(title = 'Year'),
yaxis = dict(title = 'Dollars per Gallon'),
)
fig = dict(data=data, layout=layout)
offline.iplot(fig, filename='styled-line')
df_heatOil.describe()
| 1 | New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon) | New York Harbor No. 2 Heating Oil Monthly Change |
|---|---|---|
| count | 392.000000 | 391.000000 |
| mean | 1.278319 | 0.761170 |
| std | 0.885156 | 8.591302 |
| min | 0.304000 | -23.928378 |
| 25% | 0.539750 | -4.399083 |
| 50% | 0.840000 | 0.806452 |
| 75% | 1.867000 | 5.610438 |
| max | 3.801000 | 41.807910 |
getImportantMonths(df_heatOil)
['New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)', 'min: 0.304 in 1999-02-15 00:00:00', 'max: 3.801 in 2008-06-15 00:00:00', 'New York Harbor No. 2 Heating Oil Monthly Change', 'min: -23.928377645143794 in 2008-12-15 00:00:00', 'max: 41.807909604519764 in 1990-08-15 00:00:00']
getDistribution(df_heatOil)
Distribution of New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)
Distribution of New York Harbor No. 2 Heating Oil Monthly Change
Diesel Oil Price Analysis
#Diesel Oil
trace0 = go.Scatter(
x = df_diesel.index,
y = df_diesel['New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'],
name = 'New York Harbor',
line = dict(
color = ('rgb(228,26,28)'),
width = 2)
)
trace1 = go.Scatter(
x = df_diesel.index,
y = df_diesel['U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'],
name = 'U.S. Gulf Coast',
line = dict(
color = ('rgb(55,126,184)'),
width = 2,)
)
trace2 = go.Scatter(
x = df_diesel.index,
y = df_diesel['Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)'],
name = 'Los Angeles, CA',
line = dict(
color = ('rgb(77,175,74)'),
width = 2,)
)
data = [trace0, trace1,trace2]
layout = dict(title = 'Diesel Oil Analysis',
xaxis = dict(title = 'Year'),
yaxis = dict(title = 'Dollars per Gallon'),
)
fig = dict(data=data, layout=layout)
offline.iplot(fig, filename='styled-line')
df_diesel.describe()
| 1 | New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) | U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) | Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon) | New York Harbor Monthly Change | U.S. Gulf Coast Monthly Change | Los Angeles, CA Monthly Change |
|---|---|---|---|---|---|---|
| count | 152.000000 | 152.000000 | 274.000000 | 151.000000 | 151.000000 | 273.000000 |
| mean | 2.291855 | 2.244559 | 1.711139 | 0.229860 | 0.217588 | 0.687299 |
| std | 0.672420 | 0.667094 | 0.882817 | 7.785065 | 7.990918 | 9.590957 |
| min | 0.979000 | 0.958000 | 0.391000 | -25.325013 | -25.556158 | -25.016790 |
| 25% | 1.794750 | 1.768750 | 0.872250 | -3.889112 | -4.113479 | -5.023022 |
| 50% | 2.138000 | 2.105000 | 1.663000 | 0.885559 | 0.728700 | 1.267281 |
| 75% | 2.959000 | 2.914000 | 2.285500 | 4.884422 | 5.415227 | 6.220984 |
| max | 3.887000 | 3.851000 | 3.894000 | 19.736842 | 21.020819 | 53.708440 |
getImportantMonths(df_diesel)
['New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)', 'min: 0.979 in 2016-01-15 00:00:00', 'max: 3.887 in 2008-06-15 00:00:00', 'U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)', 'min: 0.958 in 2016-01-15 00:00:00', 'max: 3.851 in 2008-06-15 00:00:00', 'Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)', 'min: 0.391 in 1998-12-15 00:00:00', 'max: 3.894 in 2008-06-15 00:00:00', 'New York Harbor Monthly Change', 'min: -25.325013000520023 in 2008-12-15 00:00:00', 'max: 19.736842105263165 in 2009-06-15 00:00:00', 'U.S. Gulf Coast Monthly Change', 'min: -25.556158437330435 in 2008-12-15 00:00:00', 'max: 21.020819341840145 in 2009-06-15 00:00:00', 'Los Angeles, CA Monthly Change', 'min: -25.01678979180658 in 2008-10-15 00:00:00', 'max: 53.7084398976982 in 1999-03-15 00:00:00']
getDistribution(df_diesel)
Distribution of New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)
Distribution of U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)
Distribution of Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)
Distribution of New York Harbor Monthly Change
Distribution of U.S. Gulf Coast Monthly Change
Distribution of Los Angeles, CA Monthly Change
Jet Fuel Analysis
#Jet Fuel
trace0 = go.Scatter(
x = df_jet.index,
y = df_jet['U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)'],
name = 'U.S. Gulf Coast',
line = dict(
color = ('green'),
width = 2)
)
data = [trace0]
layout = dict(title = 'Jet Fuel Analysis',
xaxis = dict(title = 'Year'),
yaxis = dict(title = 'Dollars per Gallon'),
)
fig = dict(data=data, layout=layout)
offline.iplot(fig, filename='styled-line')
df_jet.describe()
| 1 | U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon) | U.S. Gulf Coast Monthly Change |
|---|---|---|
| count | 346.000000 | 345.000000 |
| mean | 1.396757 | 0.740796 |
| std | 0.911256 | 8.852279 |
| min | 0.304000 | -31.407407 |
| 25% | 0.565000 | -4.032258 |
| 50% | 1.087000 | 0.904159 |
| 75% | 2.050000 | 6.088005 |
| max | 3.886000 | 47.850467 |
getImportantMonths(df_jet)
['U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)', 'min: 0.304 in 1998-12-15 00:00:00', 'max: 3.886 in 2008-07-15 00:00:00', 'U.S. Gulf Coast Monthly Change', 'min: -31.407407407407405 in 2008-10-15 00:00:00', 'max: 47.850467289719624 in 1990-08-15 00:00:00']
getDistribution(df_jet)
Distribution of U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)
Distribution of U.S. Gulf Coast Monthly Change
Propane Analysis
#Propane
trace0 = go.Scatter(
x = df_prop.index,
y = df_prop['Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)'],
name = 'Mont Belvieu, TX',
line = dict(
color = ('green'),
width = 2)
)
data = [trace0]
layout = dict(title = 'Propane Analysis',
xaxis = dict(title = 'Year'),
yaxis = dict(title = 'Dollars per Gallon'),
)
fig = dict(data=data, layout=layout)
offline.iplot(fig, filename='styled-line')
df_prop.describe()
| 1 | Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon) | Mont Belvieu, TX Propane Monthly Change |
|---|---|---|
| count | 320.000000 | 319.000000 |
| mean | 0.712997 | 0.689603 |
| std | 0.379146 | 9.731611 |
| min | 0.209000 | -31.699346 |
| 25% | 0.372000 | -4.995965 |
| 50% | 0.612500 | 0.619469 |
| 75% | 0.968000 | 6.407099 |
| max | 1.862000 | 29.672897 |
getImportantMonths(df_prop)
['Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)', 'min: 0.209 in 1998-12-15 00:00:00', 'max: 1.862 in 2008-07-15 00:00:00', 'Mont Belvieu, TX Propane Monthly Change', 'min: -31.699346405228766 in 2008-10-15 00:00:00', 'max: 29.672897196261694 in 2000-01-15 00:00:00']
getDistribution(df_prop)
Distribution of Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)
Distribution of Mont Belvieu, TX Propane Monthly Change
Transforming the unit Barrel to Gallon price of Crude Oil dataset to make proper comparison between the other datasets
df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Gallon)']=df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)'].apply(lambda x: x/31.5)
df_crudeOil['Europe Brent Spot Price FOB (Dollars per Gallon)']=df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'].apply(lambda x: x/31.5)
df_crudeOil
| 1 | Cushing, OK WTI Spot Price FOB (Dollars per Barrel) | Europe Brent Spot Price FOB (Dollars per Barrel) | Cushing, OK WTI Monthly Change | Europe Brent Monthly Change | Cushing, OK WTI Spot Price FOB (Dollars per Gallon) | Europe Brent Spot Price FOB (Dollars per Gallon) |
|---|---|---|---|---|---|---|
| Date | ||||||
| 1986-01-15 | 22.93 | NaN | NaN | NaN | 0.727937 | NaN |
| 1986-02-15 | 15.46 | NaN | -32.577410 | NaN | 0.490794 | NaN |
| 1986-03-15 | 12.61 | NaN | -18.434670 | NaN | 0.400317 | NaN |
| 1986-04-15 | 12.84 | NaN | 1.823949 | NaN | 0.407619 | NaN |
| 1986-05-15 | 15.38 | NaN | 19.781931 | NaN | 0.488254 | NaN |
| 1986-06-15 | 13.43 | NaN | -12.678804 | NaN | 0.426349 | NaN |
| 1986-07-15 | 11.59 | NaN | -13.700670 | NaN | 0.367937 | NaN |
| 1986-08-15 | 15.10 | NaN | 30.284728 | NaN | 0.479365 | NaN |
| 1986-09-15 | 14.87 | NaN | -1.523179 | NaN | 0.472063 | NaN |
| 1986-10-15 | 14.90 | NaN | 0.201748 | NaN | 0.473016 | NaN |
| 1986-11-15 | 15.22 | NaN | 2.147651 | NaN | 0.483175 | NaN |
| 1986-12-15 | 16.11 | NaN | 5.847569 | NaN | 0.511429 | NaN |
| 1987-01-15 | 18.65 | NaN | 15.766605 | NaN | 0.592063 | NaN |
| 1987-02-15 | 17.75 | NaN | -4.825737 | NaN | 0.563492 | NaN |
| 1987-03-15 | 18.30 | NaN | 3.098592 | NaN | 0.580952 | NaN |
| 1987-04-15 | 18.68 | NaN | 2.076503 | NaN | 0.593016 | NaN |
| 1987-05-15 | 19.44 | 18.58 | 4.068522 | NaN | 0.617143 | 0.589841 |
| 1987-06-15 | 20.07 | 18.86 | 3.240741 | 1.506997 | 0.637143 | 0.598730 |
| 1987-07-15 | 21.34 | 19.86 | 6.327853 | 5.302227 | 0.677460 | 0.630476 |
| 1987-08-15 | 20.31 | 18.98 | -4.826617 | -4.431017 | 0.644762 | 0.602540 |
| 1987-09-15 | 19.53 | 18.31 | -3.840473 | -3.530032 | 0.620000 | 0.581270 |
| 1987-10-15 | 19.86 | 18.76 | 1.689708 | 2.457673 | 0.630476 | 0.595556 |
| 1987-11-15 | 18.85 | 17.78 | -5.085599 | -5.223881 | 0.598413 | 0.564444 |
| 1987-12-15 | 17.28 | 17.05 | -8.328912 | -4.105737 | 0.548571 | 0.541270 |
| 1988-01-15 | 17.13 | 16.75 | -0.868056 | -1.759531 | 0.543810 | 0.531746 |
| 1988-02-15 | 16.80 | 15.73 | -1.926445 | -6.089552 | 0.533333 | 0.499365 |
| 1988-03-15 | 16.20 | 14.73 | -3.571429 | -6.357279 | 0.514286 | 0.467619 |
| 1988-04-15 | 17.86 | 16.60 | 10.246914 | 12.695180 | 0.566984 | 0.526984 |
| 1988-05-15 | 17.42 | 16.31 | -2.463606 | -1.746988 | 0.553016 | 0.517778 |
| 1988-06-15 | 16.53 | 15.54 | -5.109070 | -4.721030 | 0.524762 | 0.493333 |
| ... | ... | ... | ... | ... | ... | ... |
| 2016-08-15 | 44.72 | 45.84 | 0.156775 | 1.979978 | 1.419683 | 1.455238 |
| 2016-09-15 | 45.18 | 46.57 | 1.028623 | 1.592496 | 1.434286 | 1.478413 |
| 2016-10-15 | 49.78 | 49.52 | 10.181496 | 6.334550 | 1.580317 | 1.572063 |
| 2016-11-15 | 45.66 | 44.73 | -8.276416 | -9.672859 | 1.449524 | 1.420000 |
| 2016-12-15 | 51.97 | 53.31 | 13.819536 | 19.181757 | 1.649841 | 1.692381 |
| 2017-01-15 | 52.50 | 54.58 | 1.019819 | 2.382292 | 1.666667 | 1.732698 |
| 2017-02-15 | 53.47 | 54.87 | 1.847619 | 0.531330 | 1.697460 | 1.741905 |
| 2017-03-15 | 49.33 | 51.59 | -7.742659 | -5.977766 | 1.566032 | 1.637778 |
| 2017-04-15 | 51.06 | 52.31 | 3.506994 | 1.395619 | 1.620952 | 1.660635 |
| 2017-05-15 | 48.48 | 50.33 | -5.052879 | -3.785127 | 1.539048 | 1.597778 |
| 2017-06-15 | 45.18 | 46.37 | -6.806931 | -7.868071 | 1.434286 | 1.472063 |
| 2017-07-15 | 46.63 | 48.48 | 3.209385 | 4.550356 | 1.480317 | 1.539048 |
| 2017-08-15 | 48.04 | 51.70 | 3.023804 | 6.641914 | 1.525079 | 1.641270 |
| 2017-09-15 | 49.82 | 56.15 | 3.705246 | 8.607350 | 1.581587 | 1.782540 |
| 2017-10-15 | 51.58 | 57.51 | 3.532718 | 2.422084 | 1.637460 | 1.825714 |
| 2017-11-15 | 56.64 | 62.71 | 9.810004 | 9.041906 | 1.798095 | 1.990794 |
| 2017-12-15 | 57.88 | 64.37 | 2.189266 | 2.647106 | 1.837460 | 2.043492 |
| 2018-01-15 | 63.70 | 69.08 | 10.055287 | 7.317073 | 2.022222 | 2.193016 |
| 2018-02-15 | 62.23 | 65.32 | -2.307692 | -5.442965 | 1.975556 | 2.073651 |
| 2018-03-15 | 62.73 | 66.02 | 0.803471 | 1.071647 | 1.991429 | 2.095873 |
| 2018-04-15 | 66.25 | 72.11 | 5.611350 | 9.224477 | 2.103175 | 2.289206 |
| 2018-05-15 | 69.98 | 76.98 | 5.630189 | 6.753571 | 2.221587 | 2.443810 |
| 2018-06-15 | 67.87 | 74.41 | -3.015147 | -3.338529 | 2.154603 | 2.362222 |
| 2018-07-15 | 70.98 | 74.25 | 4.582290 | -0.215025 | 2.253333 | 2.357143 |
| 2018-08-15 | 68.06 | 72.53 | -4.113835 | -2.316498 | 2.160635 | 2.302540 |
| 2018-09-15 | 70.23 | 78.89 | 3.188363 | 8.768785 | 2.229524 | 2.504444 |
| 2018-10-15 | 70.75 | 81.03 | 0.740424 | 2.712638 | 2.246032 | 2.572381 |
| 2018-11-15 | 56.96 | 64.75 | -19.491166 | -20.091324 | 1.808254 | 2.055556 |
| 2018-12-15 | 49.52 | 57.36 | -13.061798 | -11.413127 | 1.572063 | 1.820952 |
| 2019-01-15 | 51.38 | 59.41 | 3.756058 | 3.573919 | 1.631111 | 1.886032 |
397 rows × 6 columns
Visualizing all time series data together
trace0 = go.Scatter(
x = df_crudeOil.index,
y = df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Gallon)'],
name = 'Cushing, OK WTI-Crude Oil',
line = dict(
color = ('rgb(166,206,227)'),
width = 2)
)
trace1 = go.Scatter(
x = df_crudeOil.index,
y = df_crudeOil['Europe Brent Spot Price FOB (Dollars per Gallon)'],
name = 'Europe Brent-Crude Oil',
line = dict(
color = ('rgb(31,120,180)'),
width = 2,)
)
trace2 = go.Scatter(
x = df_cGasoline.index,
y = df_cGasoline['New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'],
name = 'New York Harbor-Conventional Gasoline',
line = dict(
color = ('rgb(178,223,138)'),
width = 2)
)
trace3 = go.Scatter(
x = df_cGasoline.index,
y = df_cGasoline['U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'],
name = 'U.S. Gulf Coast-Conventional Gasoline',
line = dict(
color = ('rgb(51,160,44)'),
width = 2,)
)
trace4 = go.Scatter(
x = df_rGasoline.index,
y = df_rGasoline['Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)'],
name = 'Los Angeles-RBOB Regular Gasoline',
line = dict(
color = ('rgb(255,255,153)'),
width = 2,)
)
trace5 = go.Scatter(
x = df_heatOil.index,
y = df_heatOil['New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)'],
name = 'New York Harbor-Heating Oil',
line = dict(
color = ('rgb(177,89,40)'),
width = 2,)
)
trace6 = go.Scatter(
x = df_diesel.index,
y = df_diesel['New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'],
name = 'New York Harbor-Diesel',
line = dict(
color = ('rgb(253,191,111)'),
width = 2,)
)
trace7 = go.Scatter(
x = df_diesel.index,
y = df_diesel['U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'],
name = 'U.S. Gulf Coast-Diesel',
line = dict(
color = ('rgb(255,127,0)'),
width = 2,)
)
trace8 = go.Scatter(
x = df_diesel.index,
y = df_diesel['Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)'],
name = 'Los Angeles, CA-Diesel',
line = dict(
color = ('rgb(227,26,28)'),
width = 2,)
)
trace9 = go.Scatter(
x = df_jet.index,
y = df_jet['U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)'],
name = 'U.S. Gulf Coast-Jet Fuel',
line = dict(
color = ('rgb(251,154,153)'),
width = 2,)
)
trace10 = go.Scatter(
x = df_prop.index,
y = df_prop['Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)'],
name = 'Mont Belvieu, TX-Propane',
line = dict(
color = ('rgb(106,61,154)'),
width = 2,)
)
data = [trace0, trace1,trace2,trace3,trace4,trace5,trace6,trace7,trace8,trace9,trace10]
layout = dict(title = 'Analysis of All Pretroleum fuels',
xaxis = dict(title = 'Year'),
yaxis = dict(title = 'Dollars per Gallon'),
)
fig = dict(data=data, layout=layout)
offline.iplot(fig, filename='styled-line')
Creating heatmap to see the price distribution of products across the years
combined_data = pd.concat([df_crudeOil.iloc[:,4:], df_cGasoline.iloc[:,:2],df_rGasoline.iloc[:,:1],df_heatOil
.iloc[:,:1],df_diesel.iloc[:,:3],df_jet.iloc[:,:1],df_prop.iloc[:,:1]], axis=1, sort=False)
cmap = sns.color_palette("Blues")
sns.heatmap(combined_data,cmap=cmap)
<matplotlib.axes._subplots.AxesSubplot at 0x228fde920b8>
Plotting the coorelation plot to see relation between all the variables
combined_data.columns=[['Crushing Crude','Eurupe Crude','NY C Gas','Gulf Coast C Gas','LA R Gas','NY Heat','NY Diesel','Gulf Coast Diesel','LA Diesel','Gulf Coast Kerosene','TX Prop']]
g = sns.pairplot(combined_data)
#Montly Change HeatMap
combined_data_monthly_change = pd.concat([df_crudeOil.iloc[:,2:4], df_cGasoline.iloc[:,2:],df_rGasoline.iloc[:,1:],df_heatOil
.iloc[:,1:],df_diesel.iloc[:,3:],df_jet.iloc[:,1:],df_prop.iloc[:,1:]], axis=1, sort=False)
cmap = sns.color_palette("Blues")
sns.heatmap(combined_data_monthly_change,cmap=cmap)
<matplotlib.axes._subplots.AxesSubplot at 0x228832ab5c0>
g = sns.pairplot(df_cGasoline.iloc[:,:2])
np.corrcoef(df_cGasoline.iloc[:,0],df_cGasoline.iloc[:,1])
array([[1. , 0.99802942],
[0.99802942, 1. ]])
We can see strong positive coorelation between both the cities' prices for conventional Gasoline
def checkIfLowerThanPrior(prior,current):
to_check=current-prior
if ((to_check <0).sum() == to_check.size).astype(np.int)==1:
return True
else:
return False
#Checking for monthly data
timeframe=[]
for row in range(0,len(combined_data)):
if np.isnan(np.array(combined_data.iloc[row-1,:])).any() or np.isnan(np.array(combined_data.iloc[row,:])).any():
continue
else:
if checkIfLowerThanPrior(np.array(combined_data.iloc[row-1,:]),np.array(combined_data.iloc[row,:])):
timeframe.append(combined_data.index[row])
#Plot to represent the findings
print("Plot to represent the times when all Products prices were lower than their prior data")
X = pd.Series(timeframe).apply(lambda x: str(x).split('-')[1])
Y = pd.Series(timeframe).apply(lambda x: str(x).split('-')[0])
Z = np.ones(len(X))
plot_data = pd.DataFrame({'Month': X, 'Year': Y, 'True/False': Z})
data_pivoted = plot_data.pivot("Month", "Year", "True/False")
ax = sns.heatmap(data_pivoted)
plt.show()
Plot to represent the times when all Products prices were lower than their prior data
Predicting prices for Cushing, OK WTI Spot Price FOB in Barrels
#Check for stationarity of the time series
from statsmodels.tsa.stattools import adfuller
X = df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
ADF Statistic: -1.743708 p-value: 0.408745
As p-value > 0.05, we can say that the series is non stationary Lets take log of the Series and check if that is make it stationary
X = np.log(X)
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
ADF Statistic: -1.964412 p-value: 0.302392
Again, our hull hypotheisis will be rejected in this case. The series stays to be non-stationary.
Let's check the Trend, seasonality and residual of the time series
As the series is non-stationary, we can apply S-ARIMA model for forecasting the prices of the of Crude Oil at Cushing
#Divide information into training and testing data
y = df_crudeOil[:'2010'].resample('MS').mean()
df_train=y[:'2010']
df_test=y['2010':]
#Decompose to see the trend and seasonality
decomposition = sm.tsa.seasonal_decompose(df_train['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)'], model='multiplicative')
fig = decomposition.plot()
plt.show()
Trend and observed graph show similar trend, it will be a multiplicative model.
Here, we try to get the best possible values p,q, r for our seasonal ARIMA model (that is which has the least AIC value)
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
min_val=9999999
comment=""
for param in pdq:
for param_seasonal in seasonal_pdq:
mod = sm.tsa.statespace.SARIMAX(df_train[['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']],
order=param,
seasonal_order=param_seasonal,
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
#print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
if(int(results.aic)<min_val):
min_val=int(results.aic)
comment=('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
print(str("Minimum AIC value is obtained when: ")+comment)
Minimum AIC value is obtained when: ARIMA(1, 1, 1)x(0, 1, 1, 12)12 - AIC:1502.443817496256
Using the min AIC value found combinations of p,q,r for training S-ARIMA model
mod = sm.tsa.statespace.SARIMAX(df_train[['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']],
order=(1, 1, 1),
seasonal_order=(0, 1, 1, 12),
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.5930 0.063 9.476 0.000 0.470 0.716
ma.L1 -0.1982 0.093 -2.133 0.033 -0.380 -0.016
ma.S.L12 -0.9565 0.059 -16.315 0.000 -1.071 -0.842
sigma2 12.9744 0.778 16.671 0.000 11.449 14.500
==============================================================================
results.plot_diagnostics(figsize=(16, 8))
plt.show()
pred = results.get_prediction(start=pd.to_datetime('2010-01-01'), dynamic=False)
pred_ci = pred.conf_int()
ax = y[['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']].plot(label='Cushing, OK WTI Spot Price FOB (Dollars per Barrel)')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, figsize=(14, 7))
ax.fill_between(pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Dollars per Barrel')
plt.legend()
plt.show()
rms = sqrt(mean_squared_error(df_test[['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']], pred.predicted_mean))
print("Root mean squered error: "+str(rms))
Root mean squered error: 5.168870748570945
The visualization shows that the predictation was a correctede made. Therefore, we move ahead for training the model now with the complete dataset.
mod = sm.tsa.statespace.SARIMAX(df_crudeOil[['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']],
order=(1, 1, 1),
seasonal_order=(0, 1, 1, 12),
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])
results.plot_diagnostics(figsize=(16, 8))
plt.show()
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.4611 0.078 5.940 0.000 0.309 0.613
ma.L1 -0.1083 0.102 -1.061 0.288 -0.308 0.092
ma.S.L12 -1.0000 166.972 -0.006 0.995 -328.259 326.259
sigma2 16.1854 2702.814 0.006 0.995 -5281.232 5313.603
==============================================================================
Again, everything seems good.
#Predict for next 6 months
pred = results.forecast(6)
pred=pd.DataFrame(pred)
date_list=['2019-02-15','2019-03-15','2019-04-15','2019-05-15','2019-06-15','2019-07-15']
pred.index=pd.to_datetime(date_list)
trace0 = go.Scatter(
x = df_crudeOil.index,
y = df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)'],
name = 'Cushing, OK WTI-Crude Oil',
line = dict(
color = ('rgb(166,206,227)'),
width = 2)
)
trace1 = go.Scatter(
x = pred.index,
y = pred[0],
name = 'Forecasted',
line = dict(
color = ('blue'),
width = 2)
)
data = [trace0,trace1]
layout = dict(title = 'Crude Oil - Cushing, OK WTI Spot Price FOB Forecast for next 6 months',
xaxis = dict(title = 'Year'),
yaxis = dict(title = 'Dollars per Barrel'),
)
fig = dict(data=data, layout=layout)
offline.iplot(fig, filename='styled-line')
The forcasts seems to be aligned with it previous value
Predicting Price for Crude at Europe Bent in Barrels
#X=pd.DataFrame(df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'])
X=[x for x in df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'] if np.isnan(x)!=True]
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
ADF Statistic: -2.025794 p-value: 0.275379
Series is non - stationary, p-value > 0.05
X = np.log(X)
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
ADF Statistic: -1.642520 p-value: 0.460944
Log values of time series, also says that it is non-stationary
Predict the prices using seasonal ARIMA
Lets start with seeing the decomposition of data
y = df_crudeOil[:'2010'].resample('MS').mean()
y=y.dropna()
df_train=y[:'2010']
df_test=y['2010':]
df_train
#Decompose to see the trend and seasonality
decomposition = sm.tsa.seasonal_decompose(df_train['Europe Brent Spot Price FOB (Dollars per Barrel)'], model='multiplicative')
fig = decomposition.plot()
plt.show()
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
min_val=9999999
comment=""
for param in pdq:
for param_seasonal in seasonal_pdq:
mod = sm.tsa.statespace.SARIMAX(df_train[['Europe Brent Spot Price FOB (Dollars per Barrel)']],
order=param,
seasonal_order=param_seasonal,
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
#print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
if(int(results.aic)<min_val):
min_val=int(results.aic)
comment=('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
print(str("Minimum AIC value is obtained when: ")+comment)
Minimum AIC value is obtained when: ARIMA(1, 1, 1)x(0, 1, 1, 12)12 - AIC:1447.3098593150075
Similar to what we did before, we take the found combination and feed it to the SARIMA Model and its diagnostics
mod = sm.tsa.statespace.SARIMAX(df_train[['Europe Brent Spot Price FOB (Dollars per Barrel)']],
order=(1, 1, 1),
seasonal_order=(0, 1, 1, 12),
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])
results.plot_diagnostics(figsize=(16, 8))
plt.show()
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.5162 0.081 6.379 0.000 0.358 0.675
ma.L1 -0.1372 0.113 -1.210 0.226 -0.359 0.085
ma.S.L12 -1.0376 0.087 -11.900 0.000 -1.209 -0.867
sigma2 13.8474 1.661 8.339 0.000 10.593 17.102
==============================================================================
Everything seems good, so we move ahead with the visualizing the predictions
pred = results.get_prediction(start=pd.to_datetime('2010-01-01'), dynamic=False)
pred_ci = pred.conf_int()
ax = y[['Europe Brent Spot Price FOB (Dollars per Barrel)']].plot(label='Europe Brent Spot Price FOB (Dollars per Barrel)')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, figsize=(14, 7))
ax.fill_between(pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Price in Barrels')
plt.legend()
plt.show()
rms = sqrt(mean_squared_error(df_test[['Europe Brent Spot Price FOB (Dollars per Barrel)']], pred.predicted_mean))
print("Root mean squered error: "+str(rms))
Root mean squered error: 4.681480507769938
The model could correctly get the trend and predict the results. Now, we will train the model with complete data.
mod = sm.tsa.statespace.SARIMAX(df_crudeOil[['Europe Brent Spot Price FOB (Dollars per Barrel)']],
order=(1, 1, 1),
seasonal_order=(0, 1, 1, 12),
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])
results.plot_diagnostics(figsize=(16, 8))
plt.show()
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.3229 0.083 3.900 0.000 0.161 0.485
ma.L1 0.0303 0.102 0.296 0.767 -0.170 0.231
ma.S.L12 -1.0000 246.049 -0.004 0.997 -483.247 481.247
sigma2 17.7862 4376.580 0.004 0.997 -8560.153 8595.726
==============================================================================
All seems good, we therefore, visualize the forecasted data
#Predict for next 6 months
pred = results.forecast(6)
pred=pd.DataFrame(pred)
date_list=['2019-02-15','2019-03-15','2019-04-15','2019-05-15','2019-06-15','2019-07-15']
pred.index=pd.to_datetime(date_list)
trace0 = go.Scatter(
x = df_crudeOil.index,
y = df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'],
name = 'Europe Brent - Crude Oil',
line = dict(
color = ('rgb(166,206,227)'),
width = 2)
)
trace1 = go.Scatter(
x = pred.index,
y = pred[0],
name = 'Forecasted',
line = dict(
color = ('blue'),
width = 2)
)
data = [trace0,trace1]
layout = dict(title = 'Crude Oil - Europe Brent Spot Price FOB Forecast for next 6 months',
xaxis = dict(title = 'Year'),
yaxis = dict(title = 'Dollars per Barrel'),
)
fig = dict(data=data, layout=layout)
offline.iplot(fig, filename='styled-line')